#import resturant csv to my computer
data1 <- read.csv("/Users/lin/Desktop/Restaurant.csv")
head(data1)
class(data1$cuisines)
[1] "factor"
a <- as.character(data1$cuisines)
#I am trying to identity which resturant is vegetarian
library("stringr")
vege <- str_detect(a,"Vegetarian")
data2 <- cbind(vege,data1)
#Filter out the restaurant which is not vegetarian
class(data2$vege)
[1] "logical"
data2$vege <- as.numeric(data2$vege)
#do the subset for vege
library(dplyr)
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
data3 <- subset(data2,vege==1)
library(dplyr)
data4 <- data3 %>%
select(id,city,name,latitude, longitude,phones,paymentTypes,postalCode) %>%
na.omit()
View(data4)
library(sp)
library(maps)
library(maptools)
Checking rgeos availability: TRUE
# The single argument to this function, pointsDF, is a data.frame in which:
# - column 1 contains the longitude in degrees (negative in the US)
# - column 2 contains the latitude in degrees
latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per county
counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)
# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
countyNames[indices]
}
# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = data4$longitude, y = data4$latitude)
county_list<- latlong2county(testPoints)
county_list_data <- as.data.frame(county_list)
data5 <- cbind(data4,county_list_data) #merge to data4 by adding a column named county_list
data5 <- data5 %>%
na.omit()
unique(data5$county_list)
[1] new york,chautauqua new york,albany new york,saratoga
[4] new york,rensselaer new york,washington new york,ulster
[7] new york,warren new york,westchester new york,kings
[10] new york,queens new york,new york new york,cayuga
[13] new york,bronx new york,dutchess new york,nassau
[16] new york,ontario new york,erie new york,suffolk
[19] new york,onondaga new york,columbia new york,franklin
[22] new york,monroe new york,orange new york,oneida
[25] new york,essex new york,otsego new york,greene
[28] new york,putnam new york,niagara new york,delaware
[31] new york,rockland new york,clinton new york,montgomery
[34] new york,broome new york,tompkins new york,seneca
[37] new york,sullivan new york,madison new york,st lawrence
[40] new york,steuben new york,chemung new york,herkimer
[43] new york,chenango new york,schenectady new york,hamilton
[46] new york,livingston new york,jefferson new york,tioga
[49] new york,cortland new york,yates new york,fulton
[52] new york,wayne new york,schuyler new york,orleans
[55] new york,cattaraugus new york,genesee new york,lewis
[58] new york,oswego
58 Levels: new york,albany new york,bronx ... new york,yates
#regular expression for ny vege restaurant data
county_name <- as.character(data5$county_list)
# Remove all before and up to ",":
county_name2 <- gsub(".*,","",county_name)
data6 <- cbind(county_name2,data5)
data6$county_list <- NULL
data6
unique(data6$county_name2)
[1] chautauqua albany saratoga rensselaer washington ulster
[7] warren westchester kings queens new york cayuga
[13] bronx dutchess nassau ontario erie suffolk
[19] onondaga columbia franklin monroe orange oneida
[25] essex otsego greene putnam niagara delaware
[31] rockland clinton montgomery broome tompkins seneca
[37] sullivan madison st lawrence steuben chemung herkimer
[43] chenango schenectady hamilton livingston jefferson tioga
[49] cortland yates fulton wayne schuyler orleans
[55] cattaraugus genesee lewis oswego
58 Levels: albany bronx broome cattaraugus cayuga chautauqua ... yates
#import election data
eleccounty<-read.csv("/Users/lin/Desktop/2016_US_County_Level_Presidential_Results.csv")
eleccounty$X1 <- NULL
eleccounty <- eleccounty %>%
filter(state_abbr=="NY") #there are 62 counties in NY State
#do some regular expression on election data
foo <- as.character(eleccounty$county_name)
foo2 <- gsub("\\s*\\w*$", "", foo)
foo3 <- tolower(foo2)
foo4 <- str_replace(foo3, "st. lawrence", "st lawrence")
foo4
[1] "albany" "allegany" "bronx" "broome" "cattaraugus"
[6] "cayuga" "chautauqua" "chemung" "chenango" "clinton"
[11] "columbia" "cortland" "delaware" "dutchess" "erie"
[16] "essex" "franklin" "fulton" "genesee" "greene"
[21] "hamilton" "herkimer" "jefferson" "kings" "lewis"
[26] "livingston" "madison" "monroe" "montgomery" "nassau"
[31] "new york" "niagara" "oneida" "onondaga" "ontario"
[36] "orange" "orleans" "oswego" "otsego" "putnam"
[41] "queens" "rensselaer" "richmond" "rockland" "saratoga"
[46] "schenectady" "schoharie" "schuyler" "seneca" "st lawrence"
[51] "steuben" "suffolk" "sullivan" "tioga" "tompkins"
[56] "ulster" "warren" "washington" "wayne" "westchester"
[61] "wyoming" "yates"
#regular expression of final step for election data
new_eleccounty <- cbind(eleccounty,foo4)
names(new_eleccounty)[names(new_eleccounty) == 'foo4'] <- 'county_name2'
#calculate who is win in 2016 on the county level
new_eleccounty <- new_eleccounty %>%
mutate(difference_in_vote=votes_gop-votes_dem) %>%
mutate (R.D= ifelse(difference_in_vote > 0, "Republican", "Democrat"))
#combine cleaned ny vege reastaurant with election county data in 2016
#left outer join
df <- left_join(data6, new_eleccounty,by = "county_name2")
Column `county_name2` joining factors with different levels, coercing to character vector
df$paymentTypes <- as.character(df$paymentTypes)
df$paymentTypes[df$paymentTypes==""] <- "NA"
df$paymentTypes <- as.factor(df$paymentTypes)
#make a leaflet map of vege restaurant in NY States
library(leaflet)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(RColorBrewer)
pal = colorFactor(palette = c("blue", "red"), domain = df$R.D) # Grab a palette
color_vote = pal(df$R.D)
content <- paste("ID:",df$id,"<br/>",
"County:",df$county_name,"<br/>",
"Restaurant Name:",df$name,"<br/>",
"Phones:",df$phones,"<br/>",
"PaymentType:",df$paymentTypes,"<br/>",
"PostCode",df$postalCode,"<br/>"
)
leaflet(df) %>%
addProviderTiles("Stamen.TonerLite") %>%
# Using the Provider Name
addCircles(color=color_vote,
lng = ~longitude,
lat = ~latitude,
popup = content,
fillOpacity = 1,
stroke=FALSE) %>%
addLegend(pal = pal, values = ~df$R.D, title = "Won Party in 2016") %>%
addCircleMarkers(clusterOptions = markerClusterOptions())
Assuming "longitude" and "latitude" are longitude and latitude, respectively
#make another graph (not useful, feel free to ignore it!)
as.character(df$county_name)
#install.packages("urbnmapr")
library(urbnmapr)
# Returns a ggplot2 object with a geom_map of the the lower
# 48 states (plus Hawaii & Alaska reset on the bottom)
uscounties_sf <- get_urbn_map("counties",
sf = TRUE)
str(uscounties_sf)
Classes ‘sf’ and 'data.frame': 3142 obs. of 7 variables:
$ county_fips: chr "04015" "12035" "20129" "28093" ...
$ state_abbv : chr "AZ" "FL" "KS" "MS" ...
$ state_fips : chr "04" "12" "20" "28" ...
$ county_name: chr "Mohave County" "Flagler County" "Morton County" "Marshall County" ...
$ fips_class : chr "H1" "H1" "H1" "H1" ...
$ state_name : chr "Arizona" "Florida" "Kansas" "Mississippi" ...
$ geometry :sfc_MULTIPOLYGON of length 3142; first list element: List of 1
..$ :List of 1
.. ..$ : num [1:1023, 1:2] -1321573 -1321334 -1320642 -1319546 -1317819 ...
..- attr(*, "class")= chr "XY" "MULTIPOLYGON" "sfg"
- attr(*, "sf_column")= chr "geometry"
- attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA
..- attr(*, "names")= chr "county_fips" "state_abbv" "state_fips" "county_name" ...
counties <- counties %>%
filter(state_abbv=="NY")
counties
library(ggthemes)
elec_data <- inner_join(df, counties, by ="county_name")
Column `county_name` joining factor and character vector, coercing into character vector
#county_name" = "county_name")
winstatesmap <- ggplot(elec_data,
aes(x = long, y = lat, group=group)) +
geom_polygon(aes(fill = R.D), color="white")+
scale_fill_manual(values = c("steel blue","firebrick"))+
theme_map() +
coord_map(projection = "mercator")
winstatesmap